home *** CD-ROM | disk | FTP | other *** search
- title lzcomp - file compressor using limpel-ziv algorithm
-
- ;Tom Pfau
- ;Digital Equipment Corporation
- ;Parsippany, NJ
- ;
- ;v1.2, Toad Hall Tweak
- ; - Converting to COM file.
- ; - Gonna use BP to hold bit_offset throughout (faster)
- ; - Moved most buffers outside code space.
-
- ;Constants
- CLEAR equ 256 ;Clear code
- EOF equ 257 ;End of file marker
- FIRST_FREE equ 258 ;First free code
- MAXMAX equ 4096 ;Max code + 1
-
- include macros2.mlb
-
- ;Hash table entry
- hash_rec struc
- first dw ? ; First entry with this value
- next dw ? ; Next entry along chain
- char db ? ; Suffix char
- hash_rec ends
-
- ;Declare Segments
- Code segment para public 'code'
- assume CS:Code, DS:Code, ES:Code
-
- org 100H
-
- LzComp proc near
- jmp Start
-
- ;TH collecting all data segment stuff
- input_prompt db 'Input file: $'
- output_prompt db 'Output file: $'
- input_file db 80,0,80 dup (?)
- output_file db 80,0,80 dup (?)
- crlf db 13,10,'$'
- input_handle dw 0 ;?
- output_handle dw 0 ;?
-
- ;hash_seg dw hash SHR 4 ;convert to a segment?
- prefix_code dw 0 ;?
- free_code dw 0 ;?
- max_code dw 0 ;?
- nbits dw 0 ;?
- k db 0 ;?
-
- ;bit_offset dw 0 ;?
- input_offset dw 0
- input_size dw 0
- LzComp endp
-
-
- start proc near ;far
- ;TH we won't mess with any memory allocating, since I think
- ;all required buffers, hash tables, etc. can work within
- ;a lousy 64Kb segment.
- ;Won't even bother moving the stackpointer for now either.
-
- print input_prompt ;Get input file name
- input input_file
- print crlf
- print output_prompt ;And output
- input output_file
- print crlf
- ;Terminate file names with nulls
- mov al,input_file+1 ;input filename length
- xor ah,ah ;clear msb
- mov si,ax ;pointer is filename length
- mov input_file+2[si],0 ;point to last char+1, stuff 0
- mov al,output_file+1 ;output filename length
- mov si,ax ;pointer is filename length
- mov output_file+2[si],0 ;point to last char+1, stuff 0
- hopen input_file+2,0
- mov input_handle,ax ;save input file handle
- hcreat output_file+2,0
- mov output_handle,ax ;save output file handle
- call compress ;Compress file
- hclose input_handle ;Close in and out
- hclose output_handle
- exit ;Done
- start endp
-
- compress proc near
-
- l1: call init_table ;Initialize the table and some vars
- mov ax,CLEAR ;Write a clear code
- call write_code
- call read_char ;Read first char
-
- l4: xor ah,ah ;Turn char into code
- l4a: mov prefix_code,ax ;Set prefix code
- call read_char ;Read next char
- jc l17 ;Carry means EOF
- mov k,al ;Save char in k
- mov bx,prefix_code ;Get prefix code
- call lookup_code ;See if this pair in table
- jnc l4a ;nc means yes, new code in ax
- call add_code ;Add pair to table
- push bx ;Save new code
- mov ax,prefix_code ;Write old prefix code
- call write_code
- pop bx
- mov al,k ;Get last char
- cmp bx,max_code ;Exceed code size?
- jl l4 ;less means no
- cmp nbits,12 ;Currently less than 12 bits?
- jl l14 ;yes
- mov ax,CLEAR ;Write a clear code
- call write_code
- call init_table ;Reinit table
- mov al,k ;get last char
- jmp l4 ;Start over
-
- l14: inc nbits ;Increase number of bits
- shl max_code,1 ;Double max code size
- jmp l4 ;Get next char
-
- l17: mov ax,prefix_code ;Write last code
- call write_code
- mov ax,EOF ;Write EOF code
- call write_code
- mov ax,bp ;bit_offset ;Make sure buffer is flushed to file
- or ax,ax ;TH
- je l18
- mov cx,8 ;convert bits to bytes
- xor dx,dx
- div cx
- or dx,dx ;If extra bits, make sure they get
- je l17a ;written
- inc ax
- l17a: call flush
- l18: ret
- compress endp
-
- init_table proc near
- mov nbits,9 ;Set code size to 9
- mov max_code,512 ;Set max code to 512
-
- mov ax,-1 ;Unused flag
- mov cx,640 ;Clear first 256 entries
- mov di,offset hash ;TH Point to first entry
- rep stosw ;Clear it out
- mov free_code,FIRST_FREE ;Set next code to use
- ret ;done
- init_table endp
-
- write_code proc near
- push ax ;Save code
- mov ax,bp ;bit_offset ;Get bit offset
- ;TH we're keeping bit_offset in BP
- ; mov cx,nbits ;Adjust bit offset by code size
- ; add bit_offset,cx
- add bp,nbits ;TH adjust bit offset by code size
- mov cx,8 ;Convert bit offset to byte offset
- xor dx,dx
- div cx
- cmp ax,1020 ;Approaching end of buffer?
- jl wc1 ;less means no
- call flush ;Write the buffer
- ;TH we're keeping bit_offset in BP
- ; push dx ;dx contains offset within byte
- ; add dx,nbits ;adjust by code size
- ; mov bit_offset,dx ;new bit offset
- ; pop dx ;restore dx
- mov bp,dx ;TH dx contains offset within byte
- add bp,nbits ;TH adjust by code size
-
- add ax,offset output_data ;Point to last byte
- mov si,ax ;put in si
- mov al,[si] ;move byte to first position
- mov byte ptr output_data,al
- xor ax,ax ;Byte offset of zero
- wc1: add ax,offset output_data ;Point into buffer
- mov di,ax ;Destination
- pop ax ;Restore code
- mov cx,dx ;offset within byte
- xor dx,dx ;dx will catch bits rotated out
- jcxz wc3 ;If offset in byte is zero, skip shift
- wc2: shl ax,1 ;Rotate code
- rcl dx,1
- loop wc2
- or al,[di] ;Grab bits currently in buffer
- wc3: stosw ;Save data
- mov al,dl ;Grab extra bits
- stosb ;and save
- ret
- write_code endp
-
-
- flush proc near
- push ax ;Save all registers
- ;TH push bx ;AX contains number of bytes to write
- ;TH push cx
- push dx
- hwrite output_handle,output_data,ax ;write output file
- pop dx
- ;TH pop cx
- ;TH pop bx
- pop ax
- ret
- flush endp
-
- read_char proc near
- mov di,input_offset ;Anything left in buffer?
- cmp di,input_size
- jl rd1 ;less means yes
- hread input_handle,input_data,1024 ;Read next chunk of input
- or ax,ax ;TH Anything left?
- je rd2 ;equal means no, finished
-
- mov input_size,ax ;Save bytes read
- xor di,di ;TH clear DI
- mov input_offset,di ;TH 0 ;Point to beginning of buffer
- rd1:
- ;TH the mov/add instrs below are faster than the LEA.
- ; lea si,input_data[di] ;Point at character
- mov si,offset input_data ;TH
- add si,di ;Point at char
- lodsb ;Read it in
- inc input_offset ;Adjust pointer
- clc ;Success
- ret
-
- rd2: stc ;Nothing left
- ret
- read_char endp
-
-
- lookup_code proc near
- call index ;convert code to address
- xor di,di ;TH ;flag
- cmp [si].first,-1 ;Has this code been used?
- je gc4 ;equal means no
-
- inc di ;set flag
- mov bx,[si].first ;Get first entry
- gc2: call index ;convert code to address
- cmp [si].char,al ;is char the same?
- jne gc3 ;ne means no
- clc ;success
- mov ax,bx ;put found code in ax
- ret ;done
-
- gc3: cmp [si].next,-1 ;More left with this prefix?
- je gc4 ;equal means no
- mov bx,[si].next ;get next code
- jmp gc2 ;try again
-
- gc4: stc ;not found
- ret ;done
- lookup_code endp
-
-
- index proc near
- mov si,bx ;si = bx * 5 (5 byte hash entries)
- shl si,1 ;si = bx * 2 * 2 + bx
- shl si,1
- add si,bx
- add si,offset hash ;TH plus hash table base
- ret
- index endp
-
- add_code proc near
- ;Only called once
- mov bx,free_code ;Get code to use
- or di,di ;TH ;First use of this prefix?
- je ac1 ;equal means yes
- mov [si].next,bx ;point last use to new entry
- jmp short ac2
-
- ac1: mov [si].first,bx ;Point first use to new entry
- ac2: cmp bx,MAXMAX ;Have we reached code limit?
- je ac3 ;equal means yes, just return
- call index ;get address of new entry
- ;TH switched around a little
- mov [si].char,al ;save suffix char
- mov ax,-1 ;TH do this once (ok to destroy AX)
- mov [si].first,ax ;-1 ;initialize pointers
- mov [si].next,ax ;-1
- inc free_code ;TH adjust next code
- ac3: ret
- add_code endp
-
- ;TH input/output buffers moved here outside code space
- even
-
- output_data equ $ ;db 1024 dup (?)
- input_data equ output_data+1024 ;db 1024 dup (?)
-
- ;Instead of using a separate segment for memory and the hash
- ;table (with all the resultant segment register fiddling),
- ;gonna just use a dynamic buffer right in code space.
-
- hash equ input_data+1024
-
- code ends
- end LzComp